import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import visual_behavior_glm
import visual_behavior_glm.GLM_params as glm_params
import visual_behavior_glm.GLM_analysis_tools as gat
import visual_behavior.data_access.loading as loading
%matplotlib inline
%load_ext autoreload
%autoreload 2
model_output_type = 'adj_fraction_change_from_full'
glm_version = '15_events_L2_optimize_by_session'
rspm = gat.build_pivoted_results_summary(value_to_use=model_output_type, results_summary=None,
glm_version=glm_version, cutoff=None)
len(rspm)
# Ai94 data is not in here, thats good
rspm.full_genotype.unique()
# behavior model is still in here, passive change is also included for passive sessions only
rspm.keys()[:100]
rspm.head()
def get_default_features(single=False):
features = [
'all-images',
'omissions',
'licks',
'pupil',
'running',
# 'face_motion_energy', # ignore due to issues with QC
'hits',
'misses',
'false_alarms',
'correct_rejects',
'passive_change',
'beh_model'
]
if single:
features = ['single-'+feature for feature in features]
return features
features_to_plot = get_default_features(single=False)
level_up_features = [
'all-images',
'omissions',
'behavioral',
'task',
]
features_to_plot = level_up_features
# group by cell_specimen_id and session_number
# take average dropout across repeats of a given session number
df = rspm.groupby(['cell_specimen_id', 'session_number']).mean()
df.head()
# restrict to the regressors we care about
df = df[features_to_plot]
df.head()
# unstack to turn session number (the last index) into a column level
df = df.unstack()
df.values.shape
# now there is only one row per cell, with dropouts for all regressors, all sessions
df.head()
# get cell IDS
cell_specimen_ids = df.index.values
len(cell_specimen_ids)
fig, ax = plt.subplots(figsize=(20,20))
ax = sns.heatmap(df, vmin=-1, vmax=1, center=0, cmap='RdBu_r', ax=ax, cbar_kws={'label':model_output_type})
# make smaller plot
fig, ax = plt.subplots(figsize=(6,6))
ax = sns.heatmap(df, vmin=-1, vmax=1, center=0, cmap='RdBu_r', ax=ax, cbar_kws={'label':model_output_type})
ax.set_title('GLM dropouts')
original_df = df.copy()
# first flip the sign of the dropouts so they are positive
df = df.abs()
# set NaN values to zero
df[df.isnull()] = 0
# look at it again
fig, ax = plt.subplots(figsize=(6,6))
ax = sns.heatmap(df, vmin=-1, vmax=1, center=0, cmap='RdBu', ax=ax, cbar_kws={'label':model_output_type})
ax.set_title('GLM dropouts')
from sklearn.decomposition import PCA
features_for_pca = df.columns.values # this is regressors x session numbers
data = df.copy()
data = data.dropna() # shouldnt be any NaNs anymore but lets drop just in case
n_features = len(features_for_pca)
n_components = len(features_for_pca)
pca = PCA(n_components=n_components)
pca_result = pca.fit_transform(data.values)
data['pc1'] = pca_result[:,0]
data['pc2'] = pca_result[:,1]
data['pc3'] = pca_result[:,2]
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
n_pcs_95_pct = np.searchsorted(np.cumsum(pca.explained_variance_ratio_), .95)
print(n_pcs_95_pct, 'PCs explain 95% of the variance')
# plot covariance matrix
fig, ax = plt.subplots(figsize=(5,5))
ax = sns.heatmap(pca.get_covariance(), vmin=-0.05, vmax=0.05, cmap='RdBu_r', ax=ax, square=True,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'covariance'})
ax.set_title('covariance matrix')
ax.set_ylim(0, n_features)
ax.set_xticks(np.arange(0.5, len(features_for_pca)+0.5, 1))
ax.set_xticklabels(features_for_pca, rotation=90);
ax.set_yticklabels(features_for_pca, rotation=0);
# plot exp var per component
fig, ax = plt.subplots(figsize=(4,3))
ax.plot(np.arange(n_components), pca.explained_variance_ratio_, 'o-k')
ax.set_xlabel('PC number')
ax.set_ylabel('variance explained')
ax.set_title('first '+str(n_pcs_95_pct)+' PCs explain >95% of the variance')
fig, ax = plt.subplots(figsize=(5,5))
ax = sns.heatmap(pca.components_[:n_pcs_95_pct], vmin=-1, vmax=1, cmap='RdBu_r', ax=ax, square=True,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.5, "label": 'weight'})
ax.set_ylabel('principal components')
ax.set_xlabel('features')
# ax.set_title('principal axes in feature space \n(directions of maximum variance in the data)')
ax.set_ylim(0, n_pcs_95_pct)
ax.set_xticks(np.arange(0.5, len(features_for_pca)+0.5, 1))
ax.set_xticklabels(features_for_pca, rotation=90);
pca_result.shape
pca_result = pca.fit_transform(data.values)
pca_result = pca_result[:, :n_pcs_95_pct]
pca_result.shape
fig, ax = plt.subplots(figsize=(5,6))
ax = sns.heatmap(pca_result, cmap='RdBu_r', ax=ax,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'loading'})
ax.set_ylabel('cells')
ax.set_xlabel('PC')
ax.set_title('PCA loadings')
ax.set_ylim(0, pca_result.shape[0])
ax.set_xlim(0, pca_result.shape[1])
ax.set_xticks(np.arange(0, pca_result.shape[1]));
ax.set_xticklabels(np.arange(0, pca_result.shape[1]));
# cluster_divisions = np.where(np.diff(sorted_labels)==1)[0]
# for y in cluster_divisions:
# ax.hlines(y, xmin=0, xmax=n_components, color='k')
if 'cre_line' not in data.keys():
data = data.merge(rspm[['cell_specimen_id', 'cre_line']], on='cell_specimen_id')
data = data.drop_duplicates(subset='cell_specimen_id')
len(data)
# pc1 and pc2 columns in rspm correspond to pca_results for those PCs - is this the 'score' per cell?
fig,ax = plt.subplots(1,2,figsize=(12,6))
ax[0] = sns.scatterplot(data=data, x=("pc1", ""), y=("pc2", ""), hue="cre_line",
palette='hls', legend="full", alpha=0.3, ax=ax[0])
# ax[0].set_xlim(-5,10)
# ax[0].set_ylim(-5,10)
ax[1] = sns.scatterplot(data=data, x=("pc1", ""), y=("pc2", ""), hue="cre_line",
palette='hls', legend="full", alpha=0.3, ax=ax[1])
# ax[1].set_xlim(-5,10)
# ax[1].set_ylim(-5,10)
# make a dataframe out of the PCA loadings per cell and add cre line
pca_result_df = pd.DataFrame(pca_result, index=data.index.values)
pca_result_df['cre_line'] = data['cre_line'].values
pca_result_df
# plot data in PCA space again
PC1 = 0
PC2 = 1
PC3 = 3
PC4 = 4
fig,ax = plt.subplots(1, 3, figsize=(15,5))
ax = ax.ravel()
i=0
ax[i] = sns.scatterplot(data=pca_result_df, x=PC1, y=PC2, hue="cre_line",
palette='hls', legend="full", alpha=0.3, ax=ax[i])
# ax[i].set_xlim(-100,100)
# ax[i].set_ylim(-100,100)
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC2, y=PC3, hue="cre_line",
palette='hls', legend="full", alpha=0.3, ax=ax[i])
# ax[i].set_xlim(-100,100)
# ax[i].set_ylim(-100,100)
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC3, y=PC4, hue="cre_line",
palette='hls', legend="full", alpha=0.3, ax=ax[i])
# ax[i].set_xlim(-100,100)
# ax[i].set_ylim(-100,100)
fig.tight_layout()
from sklearn.cluster import KMeans
X = pca_result # cluster the PCA loadings across cells
kmeans = KMeans(n_clusters=10, random_state=0).fit(X)
labels = kmeans.labels_
kmeans.cluster_centers_.shape
labels = kmeans.labels_
sort_order = np.argsort(labels)
sorted_labels = labels[sort_order]
pca_result.shape
fig, ax = plt.subplots(figsize=(5,6))
ax = sns.heatmap(pca_result[sort_order], cmap='RdBu_r', ax=ax,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'loading'})
ax.set_ylabel('cells')
ax.set_xlabel('PC')
ax.set_title('sorted PC loading')
ax.set_ylim(0, pca_result.shape[0])
ax.set_xlim(0, pca_result.shape[1])
ax.set_xticks(np.arange(0, pca_result.shape[1]));
ax.set_xticklabels(np.arange(0, pca_result.shape[1]));
cluster_divisions = np.where(np.diff(sorted_labels)==1)[0]
for y in cluster_divisions:
ax.hlines(y, xmin=0, xmax=n_components, color='k')
cluster_df = pd.DataFrame(index=df.index, columns=['cluster_id'], data=labels)
# merge with metadata
metadata = rspm[['cell_specimen_id', 'cre_line', 'imaging_depth', 'targeted_structure', 'location']]
cluster_df = cluster_df.merge(metadata, on='cell_specimen_id')
cluster_df.head()
sns.countplot(data=cluster_df, x='cluster_id', hue='cre_line')
counts = cluster_df.groupby(['cre_line', 'cluster_id']).count().reset_index()
counts = counts.drop_duplicates(subset=['cell_specimen_id', 'cre_line'])
fig, ax = plt.subplots(1,3, figsize=(12,5))
for i,cre_line in enumerate(np.sort(cluster_df.cre_line.unique())):
ax[i].pie(counts[counts.cre_line==cre_line].cell_specimen_id.values,
labels=np.sort(cluster_df.cluster_id.unique()), autopct='%1.1f%%')
ax[i].set_title(cre_line)
cluster_ids = pd.DataFrame(index=df.index, columns=['cluster_id'], data=labels)
rspm = rspm.merge(cluster_ids, on='cell_specimen_id')
for each cluster, look at average dropout for all features, for each session type
cluster_ids = np.sort(rspm.cluster_id.unique())
n_rows = int(len(cluster_ids)/5.)
fig, ax = plt.subplots(n_rows, 5, figsize=(10,2*n_rows), sharex=True, sharey=True)
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = rspm[rspm.cluster_id==cluster_id].groupby('session_number').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False, cbar_kws={'label':model_output_type})
ax[i].set_title('cluster_id: '+str(cluster_id))
# ax[i].set_
fig.tight_layout()
arr = original_df.values
arr = np.abs(arr)
arr.shape
arr = arr.reshape(-1,4,6)
arr.shape
# make NaNs zero
arr[np.isnan(arr)] = 0
# plot dropouts across sessions for one cell
cell_index = 0
plt.imshow(arr[cell_index, :, :], cmap='RdBu', vmin=-1, vmax=1)
plt.xticks(np.arange(0,6,1))
plt.yticks(np.arange(0,len(features_to_plot),1), labels=features_to_plot)
plt.title('csid: '+str(cell_specimen_ids[cell_index]))
plt.colorbar()
# get 100 random cells
inds = len(cell_specimen_ids)*np.random.random_sample(size=100)
random_indices = [int(ind) for ind in inds]
# plot for a bunch of random cells
cell_index = 0
fig, ax = plt.subplots(8,10, figsize=(20,15), sharex=True, sharey=True)
ax = ax.ravel()
for i,cell_index in enumerate(random_indices[:80]):
ax[i].imshow(arr[cell_index, :, :], cmap='RdBu', vmin=-1, vmax=1)
ax[i].set_xticks(np.arange(0,6,1))
ax[i].set_yticks(np.arange(0,len(features_to_plot),1))
ax[i].set_yticklabels(features_to_plot)
ax[i].set_title(str(cell_specimen_ids[cell_index]))
# ax[i].colorbar()
Cells primarily care about images in specific sessions, with some weak coding for behavioral and task variables
def plot_PCA_var_explained(pca, n_components, cre_line):
n_PCs_95pct_var = np.searchsorted(np.cumsum(pca.explained_variance_ratio_), .95)
fig,ax=plt.subplots(figsize=(4,3))
ax.plot(
np.arange(n_components),
pca.explained_variance_ratio_,
'o-k'
)
ax.set_xlabel('PC number')
ax.set_ylabel('variance explained')
ax.set_title(cre_line+', first '+str(n_PCs_95pct_var)+' PCs explain >95% of the variance')
def plot_input_to_PCA(data, cre_line, features_for_pca, model_output_type):
fig, ax = plt.subplots(figsize=(5,6))
ax = sns.heatmap(data=data[features_for_pca], cmap='RdBu', ax=ax, vmin=-1, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": model_output_type})
ax.set_ylabel('cells')
ax.set_xlabel('features')
ax.set_title(cre_line)
ax.set_ylim(0, data.shape[0])
ax.set_xlim(0, data.shape[1])
ax.set_xticks(np.arange(0, data.shape[1], 1));
ax.set_xticklabels(features_for_pca);
def plot_covariance_matrix(pca, features_for_pca, n_features):
fig, ax = plt.subplots(figsize=(5,5))
ax = sns.heatmap(pca.get_covariance(), vmin=-0.05, vmax=0.05, cmap='RdBu_r', ax=ax, square=True,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'covariance'})
ax.set_title('covariance matrix')
ax.set_ylim(0, n_features)
ax.set_xticks(np.arange(0.5, len(features_for_pca)+0.5, 1))
ax.set_xticklabels(features_for_pca, rotation=90);
ax.set_yticklabels(features_for_pca, rotation=0);
def plot_PCs_by_features(pca, n_components, features_to_plot, cre_line, n_pcs_95_pct, ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(5,5))
ax = sns.heatmap(pca.components_[:n_pcs_95_pct], vmin=-1, vmax=1, cmap='RdBu_r', ax=ax, square=True,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.5, "label": 'weight'})
ax.set_ylabel('principal components')
ax.set_xlabel('features')
ax.set_title(cre_line)
ax.set_ylim(0, n_pcs_95_pct)
# ax.set_yticks(np.arange(0, n_components, 1))
# ax.set_yticklabels(np.arange(0.5, n_components+0.5, 1), rotation=90);
ax.set_xticks(np.arange(0.5, len(features_to_plot)+0.5, 1))
ax.set_xticklabels(features_to_plot, rotation=90);
return ax
# pc1 and pc2 columns in rspm correspond to pca_results for those PCs - is this the 'score' per cell?
def plot_data_in_PC_space(pca_result_df, cre_line):
PC1 = 'PC0'
PC2 = 'PC1'
PC3 = 'PC2'
PC4 = 'PC3'
PC5 = 'PC4'
PC6 = 'PC5'
fig,ax = plt.subplots(2, 3, figsize=(12,8))
ax = ax.ravel()
i=0
ax[i] = sns.scatterplot(data=pca_result_df, x=PC1, y=PC2, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC2, y=PC3, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC3, y=PC4, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC4, y=PC5, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC5, y=PC6, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
i+=1
ax[i] = sns.scatterplot(data=pca_result_df, x=PC1, y=PC6, #hue="cre_line",
legend="full", alpha=0.3, ax=ax[i])
fig.tight_layout()
fig.suptitle(cre_line, x=0.5, y=1.01)
def plot_PCA_result_heatmap(pca_result, cre_line):
fig, ax = plt.subplots(figsize=(5,6))
ax = sns.heatmap(pca_result, cmap='RdBu_r', ax=ax,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'activation'})
ax.set_ylabel('cells')
ax.set_xlabel('PC')
ax.set_title(cre_line)
ax.set_ylim(0, pca_result.shape[0])
ax.set_xlim(0, pca_result.shape[1])
ax.set_xticks(np.arange(0, pca_result.shape[1], 1));
ax.set_xticklabels(np.arange(0, pca_result.shape[1], 1));
# cluster
def cluster_pca_result_kmeans(pca_result, n_clusters=10):
from sklearn.cluster import KMeans
X = pca_result
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
labels = kmeans.labels_
return kmeans, labels
# plot sorted PC weights
def plot_sorted_PC_weights(pca_result, labels):
sort_order = np.argsort(labels)
sorted_labels = labels[sort_order]
fig, ax = plt.subplots(figsize=(5,6))
ax = sns.heatmap(pca_result[sort_order], cmap='RdBu_r', ax=ax, vmin=-1, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'loading'})
ax.set_ylabel('cells')
ax.set_xlabel('PC')
ax.set_title(cre_line)
ax.set_ylim(0, pca_result.shape[0])
ax.set_xlim(0, pca_result.shape[1])
ax.set_xticks(np.arange(0, pca_result.shape[1]));
# ax.set_xticklabels(features_for_pca, rotation=90);
cluster_divisions = np.where(np.diff(sorted_labels)==1)[0]
for y in cluster_divisions:
ax.hlines(y, xmin=0, xmax=n_components, color='k')
def create_cluster_df(df, rspm, labels):
# cluster df
cluster_df = pd.DataFrame(index=df.index, columns=['cluster_id'], data=labels)
# merge with metadata
metadata = rspm[['cell_specimen_id', 'cre_line', 'imaging_depth', 'targeted_structure', 'location']]
cluster_df = cluster_df.merge(metadata, on='cell_specimen_id')
cluster_df = cluster_df.drop_duplicates(subset=['cell_specimen_id', 'cre_line'])
return cluster_df
def plot_cluster_pie_for_cre_line(cluster_df, cre_line):
counts = cluster_df.groupby(['cre_line', 'cluster_id']).count().reset_index()
fig, ax = plt.subplots(figsize=(4,4))
ax.pie(counts[counts.cre_line==cre_line].cell_specimen_id.values,
labels=np.sort(cluster_df.cluster_id.unique()), autopct='%1.1f%%')
ax.set_title(cre_line)
# coding properties of clusters
def plot_coding_properties_of_clusters(cre_data, labels, rspm, cre_line, features_to_plot):
cluster_ids = pd.DataFrame(index=cre_data.index, columns=['cluster_id'], data=labels)
cre_rspm = rspm[rspm.cre_line==cre_line]
if 'cluster_id' in rspm.keys():
cre_rspm = cre_rspm.drop(columns=['cluster_id'])
cre_rspm = cre_rspm.merge(cluster_ids, on='cell_specimen_id')
cluster_ids = np.sort(cre_rspm.cluster_id.unique())
n_rows = int(len(cluster_ids)/5.)
fig, ax = plt.subplots(n_rows, 5, figsize=(10,2*n_rows), sharex=True, sharey=True)
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = cre_rspm[cre_rspm.cluster_id==cluster_id].groupby('session_number').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False, cbar_kws={'label':model_output_type})
ax[i].set_title('cluster_id: '+str(cluster_id))
# ax[i].set_
fig.tight_layout()
if 'cre_line' not in data.keys():
data = data.merge(rspm[['cell_specimen_id','cre_line']], on='cell_specimen_id')
data = data.drop_duplicates(subset='cell_specimen_id')
print(len(data))
features_for_pca = df.keys()
# elbow method plots
for cre_line in data.cre_line.unique():
cre_data = data[data.cre_line==cre_line]
cre_data = cre_data.dropna()
n_features = len(features_for_pca)
n_components = len(features_for_pca)
pca = PCA(n_components=n_components)
pca_result = pca.fit_transform(cre_data[features_for_pca].values)
distortions = []
K = range(1,20)
for k in K:
kmeans, labels = cluster_pca_result_kmeans(pca_result, n_clusters=k)
distortions.append(kmeans.inertia_)
fig, ax = plt.subplots(figsize = (8,4))
ax.plot(K, distortions, 'bx-')
ax.set_xlabel('k')
ax.set_ylabel('Distortion')
ax.set_title(cre_line)
full_cluster_df = pd.DataFrame()
for cre_line in data.cre_line.unique():
print(cre_line)
cre_data = data[data.cre_line==cre_line]
cre_data = cre_data.dropna()
n_features = len(features_for_pca)
n_components = len(features_for_pca)
pca = PCA(n_components=n_components)
pca_result = pca.fit_transform(cre_data[features_for_pca].values)
cre_data['pc1'] = pca_result[:,0]
cre_data['pc2'] = pca_result[:,1]
cre_data['pc3'] = pca_result[:,2]
n_pcs_95_pct = np.searchsorted(np.cumsum(pca.explained_variance_ratio_), .95)
print(n_pcs_95_pct, 'PCs explain 95% of the variance')
pca_result = pca_result[:,:n_pcs_95_pct]
# plot input matrix
plot_input_to_PCA(cre_data[features_for_pca], cre_line, features_for_pca, model_output_type)
# variance explained plot
plot_PCA_var_explained(pca, n_components, cre_line)
# plot covariance matrix
plot_covariance_matrix(pca, features_for_pca, n_features)
# PCs by features heatmap
plot_PCs_by_features(pca, n_components, features_for_pca, cre_line, n_pcs_95_pct)
# data in PC space
pca_result_df = pd.DataFrame(pca_result, index=cre_data.index, columns = ['PC'+str(i) for i in range(n_pcs_95_pct)])
plot_data_in_PC_space(pca_result_df, cre_line)
# plot PCA weights by cells
plot_PCA_result_heatmap(pca_result, cre_line)
# cluster and plot PCA weights by clusters
kmeans, labels = cluster_pca_result_kmeans(pca_result, n_clusters=10)
plot_sorted_PC_weights(pca_result, labels)
try:
cre_data = cre_data.set_index('cell_specimen_id')
except:
pass
# plot cluster breakdown within cre line
cluster_df = create_cluster_df(cre_data, rspm, labels)
full_cluster_df = pd.concat([full_cluster_df, cluster_df])
plot_cluster_pie_for_cre_line(cluster_df, cre_line)
# plot coding properties for clusters
plot_coding_properties_of_clusters(cre_data, labels, rspm, cre_line, features_to_plot)
len(full_cluster_df)
len(rspm.cell_specimen_id.unique())
experiments_table = loading.get_filtered_ophys_experiment_table(release_data_only=True)
cache_dir = loading.get_analysis_cache_dir()
df_name = 'omission_response_df'
conditions = ['cell_specimen_id']
use_events = False
multi_session_df = loading.get_multi_session_df(cache_dir, df_name, conditions, experiments_table,
remove_outliers=True, use_events=use_events)
multi_session_df = multi_session_df.reset_index().drop(columns='index')
multi_session_df = multi_session_df[multi_session_df.ophys_experiment_id.isin(experiments_table.index)]
# remove novel session retakes
indices = multi_session_df[(multi_session_df.session_number==4)&(multi_session_df.prior_exposures_to_image_set!=0)].index
multi_session_df = multi_session_df.drop(index=indices)
multi_session_df = multi_session_df.reset_index(drop=True)
omission_response_df = multi_session_df.copy()
# indices = [index for index in multi_session_df.index if len(multi_session_df.iloc[index].mean_trace) == 37]
# multi_session_df = multi_session_df.loc[indices]
def plot_mean_trace(traces, timestamps, ylabel='dF/F', legend_label=None, color='k', interval_sec=1, xlim_seconds=[-2,2],
plot_sem=True, ax=None):
if ax is None:
fig, ax = plt.subplots()
if len(traces) > 0:
trace = np.mean(traces, axis=0)
sem = (traces.std()) / np.sqrt(float(len(traces)))
ax.plot(timestamps, trace, label=legend_label, linewidth=2, color=color)
if plot_sem:
ax.fill_between(timestamps, trace + sem, trace - sem, alpha=0.5, color=color)
ax.set_xticks(np.arange(int(timestamps[0]), int(timestamps[-1]), interval_sec))
ax.set_xlim(xlim_seconds)
ax.set_xlabel('time (sec)')
ax.set_ylabel(ylabel)
sns.despine(ax=ax)
return ax
def plot_population_average_for_condition(multi_session_df, timestamps, condition_name, project_codes, data_type,
change=True, omitted=False, colors=None, ax=None, title='title',
xlim_seconds=None, save_dir=None, folder=None):
if colors is None:
c = utils.get_colors_for_session_numbers()
colors = [c[0], c[3]]
sdf = multi_session_df.copy()
sdf = sdf[sdf.project_code.isin(project_codes)]
# remove traces with incorrect length - why does this happen?
sdf = sdf.reset_index(drop=True)
indices = [index for index in sdf.index if len(sdf.iloc[index].mean_trace) == len(sdf.mean_trace.values[0])]
sdf = sdf.loc[indices]
if xlim_seconds is None:
xlim_seconds = [timestamps[0], timestamps[-1]]
if 'events' in data_type:
ylabel = 'response'
elif 'dFF' in data_type:
ylabel = 'dF/F'
elif 'pupil_area' in data_type:
ylabel = 'pupil area (pix^2)'
elif 'running' in data_type:
ylabel = 'running speed (cm/s)'
if len(project_codes) == 1:
project_code = project_codes[0]
elif len(project_codes) == 2:
project_code = 'Scientifica'
else:
project_code = 'all'
if xlim_seconds[0]<=1:
interval_sec=0.5
else:
interval_sec=1
if ax is None:
figsize = (6,4)
fig, ax = plt.subplots(figsize=figsize, sharey=False)
# for i,cre_line in enumerate(np.sort(sdf.cre_line.unique())):
for c, condition in enumerate(np.sort(sdf[condition_name].unique())):
traces = sdf[(sdf[condition_name]==condition)].mean_trace.values
# traces = [trace for trace in traces if np.amax(trace) < 4]
ax = plot_mean_trace(np.asarray(traces), timestamps, ylabel=ylabel,
legend_label=condition, color=colors[c], interval_sec=interval_sec,
xlim_seconds=xlim_seconds, ax=ax)
ax = utils.plot_flashes_on_trace(ax, timestamps, change=change, omitted=omitted)
ax.axvline(x=0, ymin=0, ymax=1, linestyle='--', color='gray')
ax.set_title(title)
# ax[i].get_legend().remove()
ax.set_xlim(xlim_seconds)
ax.set_xlabel('time relative to omission (sec)')
# ax[i].legend(loc='top left', title=condition_name, fontsize='x-small', title_fontsize='x-small')
ax.set_ylabel(ylabel)
# plt.suptitle('population average omission response', x=0.5, y=1.03, fontsize=18)
if save_dir:
fig.tight_layout()
fig_title = data_type+'_'+project_code[14:]+condition_name+'_by_cre_line'
utils.save_figure(fig, figsize, save_dir, folder, fig_title)
return ax
change = False
omitted = True
folder = 'omissions'
multiscope_expt = experiments_table[experiments_table.project_code=='VisualBehaviorMultiscope'].index.values[9]
scientifica_expt = experiments_table[experiments_table.project_code=='VisualBehavior'].index.values[9]
experiment_id = scientifica_expt
from visual_behavior.ophys.response_analysis.response_analysis import ResponseAnalysis
dataset = loading.get_ophys_dataset(experiment_id)
# dataset = cache.get_behavior_ophys_experiment(scientifica_expt)
analysis = ResponseAnalysis(dataset)
tmp = analysis.get_response_df(df_name='omission_response_df')
scientifica_timestamps = tmp.trace_timestamps.values[0]
print(len(scientifica_timestamps))
import visual_behavior.visualization.utils as utils
save_dir = r'\\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\summary_plots\population_plots\population_averages'
data_type = 'dFF_all_stim_clusters'
project_codes = ['VisualBehavior', 'VisualBehaviorTask1B']
timestamps = scientifica_timestamps
condition_name = 'session_number'
xlim_seconds = [-1,2]
sdf = multi_session_df.copy()
sdf = sdf[sdf.project_code.isin(project_codes)]
colors = utils.get_colors_for_session_numbers()
for c,cre_line in enumerate(np.sort(full_cluster_df.cre_line.unique())):
fig, ax = plt.subplots(2, 5, figsize=(12,4), sharex=True, sharey=True)
ax = ax.ravel()
for i, cluster_id in enumerate(np.sort(full_cluster_df.cluster_id.unique())):
cell_specimen_ids = full_cluster_df[(full_cluster_df.cre_line==cre_line)&(full_cluster_df.cluster_id==cluster_id)].cell_specimen_id.unique()
tmp = sdf[(sdf.cell_specimen_id.isin(cell_specimen_ids))]
ax[i] = plot_population_average_for_condition(tmp, timestamps, condition_name, project_codes, data_type,
change=change, omitted=omitted, colors=colors, ax=ax[i],
title = 'cluster_id: '+str(cluster_id),
xlim_seconds=xlim_seconds, save_dir=None, folder=folder)
plt.suptitle(cre_line, x=0.52, y=1.02, fontsize=16)
fig.tight_layout()